#!/usr/bin/env python
# -*- coding:utf-8 -*-
# @FileName  :get_sight_info.py
# @Time      :2023/10/25 
# @Author    :CL
# @email     :1037654919@qq.com

#  todo
import time


import requests
from bs4 import BeautifulSoup
from retrying import retry
from utils import mongo_manager,get_kuai_proxy
lvmama_sight_info = mongo_manager('lvmama_sight_info',db ='lvyou')
@retry(stop_max_attempt_number = 3)
def get_sight_info(url = None):
    headers = {
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
        "Accept-Language": "zh-CN,zh;q=0.9",
        "Cache-Control": "no-cache",
        "Pragma": "no-cache",
        "Proxy-Connection": "keep-alive",
        "Upgrade-Insecure-Requests": "1",
        "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
    }
    cookies = {
        "_ip_city_name": "%E4%B8%8A%E6%B5%B7",
        "_ip_city_place_id": "310000",
        "_ip_province_place_id": "310000",
        "uid": "wKgKb2U4c6DB6FY8N2IDAg==",
        "lvsessionid": "5e91e5ff-40d7-44d8-80c7-93b29dc8e917_16864194",
        "BDCCOOKIEID": "hrBhePbj3980950582",
        "BDCSESSIONID": "rSjdsWfeSHy1698217649443",
        "CoreID6": "88359017514216982176497&ci=52710000|PC",
        "Hm_lvt_cb09ebb4692b521604e77f4bf0a61013": "1698217650",
        "fp_ver": "4.5.1",
        "_ga": "GA1.2.481363095.1698217651",
        "_jzqc": "1",
        "PHPSESSID": "21rbooo9huibt9r0drchq3h2h0",
        "cmTPSet": "Y",
        "_lvTrack_u_ud": "B014A4D3-920F-4CA4-A91C-ACAE28A90E01",
        "_qzjc": "1",
        "LVMM_NODE_ENV": "production",
        "oIC": "233198233198233198233198",
        "oIT": "0925092509250925",
        "_jzqx": "1.1698299990.1698368310.3.jzqsr=lvmama%2Ecom|jzqct=/lvyou/d-beijing1%2Ehtml.jzqsr=lvmama%2Ecom|jzqct=/",
        "sensorsdata2015jssdkcross": "%7B%22distinct_id%22%3A%2218b6483ae6e583-00a9889c6d2904-13462c6c-2073600-18b6483ae6f94b%22%2C%22%24device_id%22%3A%2218b6483ae6e583-00a9889c6d2904-13462c6c-2073600-18b6483ae6f94b%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_referrer%22%3A%22%22%2C%22%24latest_referrer_host%22%3A%22%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%7D%7D",
        "_lvTrack_u_sd": "3017C82B-C376-4271-8D44-5480EC39CC23",
        "_lvTrack_firstVisitTime": "1698368344453",
        "_lvTrack_preVisitTime": "1698368344455",
        "__xsptplus443": "443.4.1698368611.1698368611.1%234%7C%7C%7C%7C%7C%23%23%23",
        "_jzqckmp": "1",
        "_gid": "GA1.2.184386355.1698631595",
        "52710000|PC_clogin": "v=37&l=1698633861&e=1698635661976",
        "_gat": "1",
        "_qzja": "1.402745185.1698218091536.1698631594340.1698633862054.1698631594340.1698633862054.0.0.0.47.11",
        "_qzjto": "2.2.0",
        "_jzqa": "1.478980885840175800.1698217652.1698631595.1698633862.11",
        "jXVJUTNgMEfp6rEr": "e9kgRdjy2rZE%2F2EOG09KS%2F04juP4BSfX1y%2F4Efk%2B9GuFp0UM35pSOqtvKmC4xAmXnrdmeWhm74o3jBTgvd5SaeeO9GQ98UmvbS8LBhdE9F1OdUtWPZBPhCioZpw4DA7FX6CRoSDG5SzOPppbV5Cjg%2FM4uzNvsreyMRmPiCKampdtpMjCeIhaVhvVZkeHapfzjNSHLvRmsjyiBLSVigUdChdUwm8fdlxANtWm0IawnyPAajy3B2V4UNUdT%2Bl1AZH6pvEViy8HFoljycI1%2BCbeHlIaaCiBgW8ZjxcrEJPpruYJB13IqIQZdzld9XiWe7qMlAfSps24b0tGem46OpZptDwaCwFo35u%2BFB0XC%2BWQlPmncKTdLyL%2BNh84lZ6k%2FWNAkQn%2B6nx908ogPDLZqtFjge4ECeKrcJRlTwWZC015cw8%3D3d23cf05d90337db1c8440f900a7550a0f8e6cf2",
        "_qzjb": "1.1698633862054.1.0.0.0",
        "_jzqb": "1.1.10.1698633862.1",
        "Hm_lpvt_cb09ebb4692b521604e77f4bf0a61013": "1698633862",
        "JSESSIONID": "AD514068E008FFF8F4A344CD2E783595",
        "_fmdata": "Lgnmwvb9UZV3dOw7baJRkFLIcqZ0qBmuoEw2PbzMKQxBloJQrAqlHMCiqafNRvHdSrGA%2BalYk7%2FffJrGpBbh5AxkoSQrgvI5S%2BxUyNwwr%2BM%3D",
        "_ga_NHEPS49S4W": "GS1.2.1698633863.11.0.1698633863.0.0.0"
    }
    if url == None:
        url = "http://www.lvmama.com/lvyou/poi/sight-175076.html"

    response = requests.get(url, headers=headers, cookies=cookies,timeout=20,verify=False)
    # requests.session().close()
    # print(response.text)
    print(url,response)
    if response.status_code == 200:
        return response.text


def main():
    lvmama_sight_info = mongo_manager('lvmama_sight_info', db='lvyou')
    seeds = lvmama_sight_info.findAll({'info':None,'status':'fail'})
    listss =[]
    for seed in seeds:
        listss.append(seed)
    lvmama_sight_info.close()
    print(len(listss))
    lvmama_sight_info = mongo_manager('lvmama_sight_info', db='lvyou')
    for seed in listss:
        url = seed['_id']
        print(seed)
        seed['status'] = 'fail'
        try:
            ress = get_sight_info(url=url)
            info = {}
            if ress:
                soups = BeautifulSoup(ress, 'lxml')
                datas = soups.find('div', class_='city_view_model').find_all('div',class_='mainList poi_heightbox js-content')
                for data in datas:
                    print(data.text.split('\n\n\n')[0].split('\n\n')[1:])
                    lists = data.text.split('\n\n\n')[0].split('\n\n')
                    if len(lists) >= 3:
                        info[lists[1]] = lists[2]
            seed['info'] = info
            if info:
                seed['status'] = 'success'
        except BaseException as e:
            print(e)
        lvmama_sight_info.updateOne({"_id": seed['_id']}, seed)
    lvmama_sight_info.close()

if __name__ == '__main__':
    try:
        res = get_sight_info()
        if res:
            soups = BeautifulSoup(res, 'lxml')
            datas = soups.find('div', class_='city_view_model').find_all('div', class_='mainList poi_heightbox js-content')
            info ={}
            for data in datas:
                print(data.text.split('\n\n\n')[0].split('\n\n')[1:])
                lists = data.text.split('\n\n\n')[0].split('\n\n')
                info[lists[1]] = lists[2]
    except:
        pass
    print()
    main()



